package com.heima.wemedia;


import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.springframework.util.CollectionUtils;

import java.io.IOException;

public class ReplicaDatas {
    /**
     * 直接通过调用修改状态方法，将所有爬虫数据 设置通过
     */
    Document document = null;
    @BeforeEach
    public void initDriver(){
        System.setProperty("webdriver.chrome.driver", "D:\\develop\\Google Chrome\\chromedriver.exe");
        WebDriver driver = new ChromeDriver();
        driver.get("https://3g.163.com/touch/news/sub/history/?ver=c&clickfrom=index2018_header_main");
        document = Jsoup.parse(driver.getPageSource());
    }


    @Test
    public void jsoupDemo() throws IOException {
        Document document = Jsoup.connect("https://3g.163.com/touch/ent/?ver=c&clickfrom=index2018_header_main")
                .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36")
                .get();

        //找到所有article标签的元素
        Elements articleEleList = document.getElementsByTag("article");
        for (Element element : articleEleList) {
            Elements h3List = element.getElementsByTag("h3");
            if (CollectionUtils.isEmpty(h3List)){
                continue;
            }
            Element h3Ele = h3List.get(0);
            System.out.println(h3Ele.text());

            Elements aList = element.getElementsByTag("a");
            if (CollectionUtils.isEmpty(aList)){
                continue;
            }
            String herf = aList.get(0).attr("href");
            System.out.println(herf);


            Elements imgList = element.getElementsByTag("img");
            if (!CollectionUtils.isEmpty(imgList)){
                for (Element img : imgList) {
                    System.out.println("封面路径："+img.attr("data-src"));
                }
            }
        }
    }
}
